# library(BiocManager)
# BiocManager::install("DESeq2")
library(dplyr)
Attaching package: ‘dplyr’
The following object is masked from ‘package:Biobase’:
combine
The following objects are masked from ‘package:BiocGenerics’:
combine, intersect, setdiff, union
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(readr)
library(DESeq2)
Loading required package: S4Vectors
Loading required package: stats4
Attaching package: ‘S4Vectors’
The following objects are masked from ‘package:dplyr’:
first, rename
The following objects are masked from ‘package:base’:
expand.grid, I, unname
Loading required package: IRanges
Attaching package: ‘IRanges’
The following objects are masked from ‘package:dplyr’:
collapse, desc, slice
Loading required package: GenomicRanges
Loading required package: GenomeInfoDb
Loading required package: SummarizedExperiment
Loading required package: MatrixGenerics
Loading required package: matrixStats
Attaching package: ‘matrixStats’
The following object is masked from ‘package:dplyr’:
count
The following objects are masked from ‘package:Biobase’:
anyMissing, rowMedians
Attaching package: ‘MatrixGenerics’
The following objects are masked from ‘package:matrixStats’:
colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse, colCounts, colCummaxs, colCummins, colCumprods,
colCumsums, colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs, colMads, colMaxs, colMeans2, colMedians,
colMins, colOrderStats, colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds, colSums2, colTabulates,
colVarDiffs, colVars, colWeightedMads, colWeightedMeans, colWeightedMedians, colWeightedSds, colWeightedVars,
rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet, rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps, rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians,
rowMins, rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks, rowSdDiffs, rowSds, rowSums2, rowTabulates,
rowVarDiffs, rowVars, rowWeightedMads, rowWeightedMeans, rowWeightedMedians, rowWeightedSds, rowWeightedVars
The following object is masked from ‘package:Biobase’:
rowMedians
library(readr)
gene_data <- read_csv('Breast_GSE45827.csv')
Rows: 151 Columns: 54677
── Column specification ─────────────────────────────────────────────
Delimiter: ","
chr (1): type
dbl (54676): samples, 1007_s_at, 1053_at, 117_at, 121_at, 1255_g_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(gene_data)
binary_class_data <- gene_data %>%
mutate(type = case_when(
type == 'normal' ~ 'healthy',
TRUE ~ 'cancer'
)
)
binary_class_data
binary_class_data <- as.data.frame(t(binary_class_data))
head(binary_class_data)
colnames(binary_class_data) <- binary_class_data[1,]
coldata <- as.data.frame(t(binary_class_data[c(2),]))
binary_class_data <- binary_class_data[-c(1,2),]
binary_class_data
coldata
converted_binary <- type.convert(binary_class_data)
converted_binary
has.neg <- apply(converted_binary, 1, function(row) any(row < 0))
which(has.neg)
dds <- DESeqDataSetFromMatrix(countData = converted_binary,
colData = coldata,
design = ~ type)
dds
library(affy)
gene_raw_data <- ReadAffy(celfile.path = "/Users/alex/Documents/GitHub/Breast-Cancer-Gene-Regulatory-Network-and-Risk-Prediction/Data/GSE45827_RAW/")
assay_data <- assayData(gene_raw_data)$exprs
pheno_data <- phenoData(gene_raw_data)
feature_data <- featureData(gene_raw_data)
arraysRMA = rma(gene_raw_data, normalize = FALSE)
log_arraysRMAtable=exprs(arraysRMA)
count_table = as.data.frame(round(2^log_arraysRMAtable))
head(count_table)
colnames(count_table) <- substr(colnames(count_table),8,10)
head(count_table)
count_table <- count_table[-c(3,6,8,10,13,16,19,24,28,31,34,36,42,48,50,52,54,57,62,71,76,82,90)]
count_table
colnames(count_table) <- as.integer(colnames(count_table))
count_table
coldata <- as.data.frame(as.integer(colnames(count_table)))
colnames(coldata) <- 'Sample'
coldata <- coldata %>%
mutate(type = case_when(
Sample > 168 & Sample < 180 ~ 'non-cancer',
TRUE ~ 'cancer'
)
)
coldata
dds <- DESeqDataSetFromMatrix(countData = count_table,
colData = coldata,
design = ~ type)
dds
# Pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
dds$type <- relevel(dds$type, ref = "non-cancer")
dds <- DESeq(dds)
res <- results(dds)
res
resultsNames(dds)
# Log Fold Change Shrink
resLFC <- lfcShrink(dds, coef="type_cancer_vs_non.cancer", type="apeglm")
resLFC
resOrdered <- res[order(res$pvalue),]
resOrdered
log2 fold change (MLE): type cancer vs non.cancer
Wald test p-value: type cancer vs non.cancer
DataFrame with 54675 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue padj
<numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
243689_s_at 25.9957 -3.89543 0.139157 -27.9930 1.97637e-172 1.08058e-167
224012_at 17.1237 -3.14268 0.129003 -24.3614 4.38935e-131 1.19994e-126
207092_at 85.7769 -5.70338 0.236180 -24.1484 7.75292e-129 1.41297e-124
211565_at 34.8702 -3.25637 0.136013 -23.9416 1.13155e-126 1.54669e-122
1552509_a_at 27.1727 -4.41723 0.185255 -23.8441 1.16619e-125 1.27523e-121
... ... ... ... ... ... ...
215244_at 22.8869 5.79663e-05 0.1330159 4.35785e-04 0.999652 0.999725
216200_at 14.0919 -4.95852e-05 0.1628031 -3.04572e-04 0.999757 0.999812
205735_s_at 18.0396 4.60511e-05 0.1960345 2.34913e-04 0.999813 0.999849
1553701_a_at 82.9793 -1.54787e-05 0.0804177 -1.92479e-04 0.999846 0.999865
1555629_at 22.8510 -1.39734e-05 0.1515850 -9.21817e-05 0.999926 0.999926
GDE = data.frame(resOrdered$log2FoldChange, resOrdered$pvalue)
rownames(GDE) = resOrdered@rownames
colnames(GDE) = c('Log2FoldChange','Ordered$pvalue')
GDE
write.csv(GDE, "GDE_Outcome.csv", row.names = TRUE)
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiMgbGlicmFyeShCaW9jTWFuYWdlcikKIyBCaW9jTWFuYWdlcjo6aW5zdGFsbCgiREVTZXEyIikKbGlicmFyeShkcGx5cikKbGlicmFyeShyZWFkcikKbGlicmFyeShERVNlcTIpCmBgYAoKYGBge3J9CmdlbmVfZGF0YSA8LSByZWFkX2NzdignQnJlYXN0X0dTRTQ1ODI3LmNzdicpCmhlYWQoZ2VuZV9kYXRhKQpgYGAKCmBgYHtyfQpiaW5hcnlfY2xhc3NfZGF0YSA8LSBnZW5lX2RhdGEgJT4lIAogIG11dGF0ZSh0eXBlID0gY2FzZV93aGVuKAogICAgdHlwZSA9PSAnbm9ybWFsJyB+ICdoZWFsdGh5JywKICAgIFRSVUUgfiAnY2FuY2VyJwogICAgKQogICkKCmJpbmFyeV9jbGFzc19kYXRhCmBgYAoKYGBge3J9CmJpbmFyeV9jbGFzc19kYXRhIDwtIGFzLmRhdGEuZnJhbWUodChiaW5hcnlfY2xhc3NfZGF0YSkpCmhlYWQoYmluYXJ5X2NsYXNzX2RhdGEpCmBgYAoKYGBge3J9CmNvbG5hbWVzKGJpbmFyeV9jbGFzc19kYXRhKSA8LSBiaW5hcnlfY2xhc3NfZGF0YVsxLF0KY29sZGF0YSA8LSBhcy5kYXRhLmZyYW1lKHQoYmluYXJ5X2NsYXNzX2RhdGFbYygyKSxdKSkKYmluYXJ5X2NsYXNzX2RhdGEgPC0gYmluYXJ5X2NsYXNzX2RhdGFbLWMoMSwyKSxdCgpiaW5hcnlfY2xhc3NfZGF0YQpjb2xkYXRhCmBgYAoKYGBge3J9CmNvbnZlcnRlZF9iaW5hcnkgPC0gdHlwZS5jb252ZXJ0KGJpbmFyeV9jbGFzc19kYXRhKQpjb252ZXJ0ZWRfYmluYXJ5CmBgYAoKYGBge3J9Cmhhcy5uZWcgPC0gYXBwbHkoY29udmVydGVkX2JpbmFyeSwgMSwgZnVuY3Rpb24ocm93KSBhbnkocm93IDwgMCkpCndoaWNoKGhhcy5uZWcpCmBgYAoKYGBge3J9CmRkcyA8LSBERVNlcURhdGFTZXRGcm9tTWF0cml4KGNvdW50RGF0YSA9IGNvbnZlcnRlZF9iaW5hcnksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbERhdGEgPSBjb2xkYXRhLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkZXNpZ24gPSB+IHR5cGUpCmRkcwpgYGAKCmBgYHtyfQpsaWJyYXJ5KGFmZnkpCmdlbmVfcmF3X2RhdGEgPC0gUmVhZEFmZnkoY2VsZmlsZS5wYXRoID0gIi9Vc2Vycy9hbGV4L0RvY3VtZW50cy9HaXRIdWIvQnJlYXN0LUNhbmNlci1HZW5lLVJlZ3VsYXRvcnktTmV0d29yay1hbmQtUmlzay1QcmVkaWN0aW9uL0RhdGEvR1NFNDU4MjdfUkFXLyIpCmBgYAoKYGBge3J9CmFzc2F5X2RhdGEgPC0gYXNzYXlEYXRhKGdlbmVfcmF3X2RhdGEpJGV4cHJzCnBoZW5vX2RhdGEgPC0gcGhlbm9EYXRhKGdlbmVfcmF3X2RhdGEpCmZlYXR1cmVfZGF0YSA8LSBmZWF0dXJlRGF0YShnZW5lX3Jhd19kYXRhKQpgYGAKCmBgYHtyfQphcnJheXNSTUEgPSBybWEoZ2VuZV9yYXdfZGF0YSwgbm9ybWFsaXplID0gRkFMU0UpCmBgYAoKYGBge3J9CmxvZ19hcnJheXNSTUF0YWJsZT1leHBycyhhcnJheXNSTUEpCmNvdW50X3RhYmxlID0gYXMuZGF0YS5mcmFtZShyb3VuZCgyXmxvZ19hcnJheXNSTUF0YWJsZSkpCmhlYWQoY291bnRfdGFibGUpCmBgYAoKYGBge3J9CmNvbG5hbWVzKGNvdW50X3RhYmxlKSA8LSBzdWJzdHIoY29sbmFtZXMoY291bnRfdGFibGUpLDgsMTApCmhlYWQoY291bnRfdGFibGUpCmBgYAoKYGBge3J9CmNvdW50X3RhYmxlIDwtIGNvdW50X3RhYmxlWy1jKDMsNiw4LDEwLDEzLDE2LDE5LDI0LDI4LDMxLDM0LDM2LDQyLDQ4LDUwLDUyLDU0LDU3LDYyLDcxLDc2LDgyLDkwKV0KY291bnRfdGFibGUKYGBgCgpgYGB7cn0KY29sbmFtZXMoY291bnRfdGFibGUpIDwtIGFzLmludGVnZXIoY29sbmFtZXMoY291bnRfdGFibGUpKQpjb3VudF90YWJsZQpgYGAKCmBgYHtyfQpjb2xkYXRhIDwtIGFzLmRhdGEuZnJhbWUoYXMuaW50ZWdlcihjb2xuYW1lcyhjb3VudF90YWJsZSkpKQoKY29sbmFtZXMoY29sZGF0YSkgPC0gJ1NhbXBsZScKCmNvbGRhdGEgPC0gY29sZGF0YSAlPiUgCiAgbXV0YXRlKHR5cGUgPSBjYXNlX3doZW4oCiAgICBTYW1wbGUgPiAxNjggJiBTYW1wbGUgPCAxODAgfiAnbm9uLWNhbmNlcicsCiAgICBUUlVFIH4gJ2NhbmNlcicKICAgICkKICApCgpjb2xkYXRhCmBgYAoKYGBge3J9CmRkcyA8LSBERVNlcURhdGFTZXRGcm9tTWF0cml4KGNvdW50RGF0YSA9IGNvdW50X3RhYmxlLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb2xEYXRhID0gY29sZGF0YSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGVzaWduID0gfiB0eXBlKQpkZHMKYGBgCgpgYGB7cn0KIyBQcmUtZmlsdGVyaW5nCmtlZXAgPC0gcm93U3Vtcyhjb3VudHMoZGRzKSkgPj0gMTAKZGRzIDwtIGRkc1trZWVwLF0KYGBgCgpgYGB7cn0KZGRzJHR5cGUgPC0gcmVsZXZlbChkZHMkdHlwZSwgcmVmID0gIm5vbi1jYW5jZXIiKQpgYGAKCmBgYHtyfQpkZHMgPC0gREVTZXEoZGRzKQpyZXMgPC0gcmVzdWx0cyhkZHMpCnJlcwpgYGAKCmBgYHtyfQpyZXN1bHRzTmFtZXMoZGRzKQpgYGAKCmBgYHtyfQojIExvZyBGb2xkIENoYW5nZSBTaHJpbmsKcmVzTEZDIDwtIGxmY1NocmluayhkZHMsIGNvZWY9InR5cGVfY2FuY2VyX3ZzX25vbi5jYW5jZXIiLCB0eXBlPSJhcGVnbG0iKQpyZXNMRkMKYGBgCgpgYGB7cn0KcmVzT3JkZXJlZCA8LSByZXNbb3JkZXIocmVzJHB2YWx1ZSksXQpyZXNPcmRlcmVkCmBgYAoKYGBge3J9CkdERSA9IGRhdGEuZnJhbWUocmVzT3JkZXJlZCRsb2cyRm9sZENoYW5nZSwgcmVzT3JkZXJlZCRwdmFsdWUpCnJvd25hbWVzKEdERSkgPSByZXNPcmRlcmVkQHJvd25hbWVzCmNvbG5hbWVzKEdERSkgPSBjKCdMb2cyRm9sZENoYW5nZScsJ09yZGVyZWQkcHZhbHVlJykKR0RFCmBgYAoKYGBge3J9CndyaXRlLmNzdihHREUsICJHREVfT3V0Y29tZS5jc3YiLCByb3cubmFtZXMgPSBUUlVFKQpgYGAKCgo=